Masthead

Linear mixed models with synthetic data in R

Below is an example of linear mixed models in R using synthetic data. This was adatped from:

Multilevel Modeling of Educational Data using R (Part 1)

The Code

The code below is heavily commented to help with seeing how it works. First, run each line of the code to see what happens. Then, try changing the data at the start of the file to see how well the linear mixed model finds the relationships in the data.

#################################################################
# Example of a linear mixed model using synthetic data
# Adapted from https://www.r-bloggers.com/multilevel-modeling-of-educational-data-using-r-part-1/

library(ggplot2)
library(lme4)

################################################################# 
# Setup groups of students with slightly different distributions

N <- 100 # Number of students per school
sigma <- 200 # standard deviation of the noise that is added to the data

# Setup the 5 independent datasets with different ranges of independent values
Independent1 <- runif(N, 10, 40) # 100 students from 10 to 40
Independent2 <- runif(N, 25, 55) # 100 students from 25 to 55
Independent3 <- runif(N, 40, 70)
Independent4 <- runif(N, 55, 85)
Independent5 <- runif(N, 70, 100)

# Setup 5 linear responses with different intercepts and slopes and with some noise
Response1 <- 20 + 0 * Independent1 + rnorm(N, 0, sigma) # intercept=20, slope=0
Response2 <- 40 + 10 * Independent2 + rnorm(N, 0, sigma) # intercept=40, slope=10
Response3 <- 60 + 20 * Independent3 + rnorm(N, 0, sigma)
Response4 <- 80 + 30 * Independent4 + rnorm(N, 0, sigma)
Response5 <- 100 + 40 * Independent5 + rnorm(N, 0, sigma)

# Create a vector with 100 "A"s, 100 "B"s, etc. to label each group
ID <- rep(LETTERS[1:5], each = N) # ID of the group each student is in

################################################################# 
# Move the response, idependent variable, and IDs into a data frame

# Create one array of all the independent values
Independent = c(Independent1, Independent2, Independent3, Independent4,Independent5) # create a single set of response values

# Create one array of all the dependent response values
Response = c(Response1, Response2, Response3, Response4, Response5) # create a single set of independent values (covariate)

# Combine the independent and response data into a data frame
FieldData <- data.frame(Independent=Independent,Response=Response, ID = ID)

# Plot the combined data
plot(FieldData$Independent,FieldData$Response)

################################################################# 
# Create a traditional linear model

LinearModel=lm(Response~Independent,data=FieldData)
#plot(LinearModel)
summary(LinearModel)

# Get the coeficients (fitted parameters) for the model so we can plot the model
LinearModelCoefs=LinearModel$coefficients
LinearModelIntercept=LinearModelCoefs[1]
LinearModelSlope=LinearModelCoefs[2]

# Plot the original data and the model
plot(FieldData$Independent,FieldData$Response)
abline(a=LinearModelIntercept,b=LinearModelSlope, col="red")

################################################################# 
# Create the linear mixed effects model

# Note that the only difference is the name "lmer" and that we have added
# that the Independent Model should have a "random effect" based on the ID values
LinearMixedModel <- lmer(Response ~ Independent + (Independent | ID), data = FieldData) # create our model with ID specified to group 1st level models
summary(LinearMixedModel)

################################################################# 
# Pull the B0s and B1s from the coeficients

TheCoefs=coef(LinearMixedModel) # get the structure with the coeficients in it

IDCoefs=TheCoefs$ID # Extract the coefcients modeled with IDs

B1s=IDCoefs$Independent
B0s=IDCoefs$`(Intercept)`

################################################################# 
# create a plot showing the groupings and the individual models for each group

# Plot the first original data group with the bounds of all the data
plot(Independent1,Response1,col='red',xlim=c(0,100),ylim=c(0,4500))

# Plot the other original data groups
points(Independent2,Response2,col="green")
points(Independent3,Response3,col="blue")
points(Independent4,Response4,col="purple")
points(Independent5,Response5,col="orange")

# Plot each of the lines for the components of the model
clip(-10,40, 0, 10000) # x1, x2, y1, y2
abline(a=B0s[1], b=B1s[1],col='black')

clip(20,55, 0, 10000) # x1, x2, y1, y2
abline(a=B0s[2], b=B1s[2],col='black')

clip(40,70, 0, 10000) # x1, x2, y1, y2
abline(a=B0s[3], b=B1s[3],col='black')

clip(55,85, 0, 10000) # x1, x2, y1, y2 abline(a=B0s[4], b=B1s[4],col='black') clip(70,100, 0, 10000) # x1, x2, y1, y2 abline(a=B0s[5], b=B1s[5],col='black')

© Copyright 2018 HSU - All rights reserved.